rm(list = ls())
library(haven)
library(plm)
library(lmtest)
library(lubridate)
library(stargazer)


setwd("C:/Users/kurtz.61/Dropbox/marcus_oil/replication_code/programs")
replication_data <- read_dta("replication_data.dta")
save(replication_data, file="replication_data.rdata")
load("replication_data.rdata")
View(replication_data)

#generate a linear time counter, and add it to dataset 
time <- replication_data$years-1980
replication_data <- cbind(replication_data, time)

ldvdata <-pdata.frame(replication_data, index = c("cnmr", "years"))


#function to create interaction plots, author information and code below.
##################CREATE INTERACTION PLOT FUNCTION######################

## Two-variable interaction plots in R
## Anton Strezhnev 
## 06/17/2013

## interaction_plot_continuous: Plots the marginal effect for one variable interacted with a continuous moderator variable
## Usage
## Required
# model: linear or generalized linear model object returned by lm() or glm() function
# effect: name of the "effect" variable in the interaction (marginal effect plotted on y-axis) - character string
# moderator: name of the moderating variable in the interaction (plotted on x-axis) - character string
# interaction: name of the interaction variable in the model object - character string
## Optional
# varcov: Variance-Covariance matrix - if default, then taken from the model object using vcov()
# minimum: Smallest value of moderator for which a marginal effect is calculated, if "min" then equal to the minimum value of the moderator in the dataset
# maximum: Largest value of moderator for which a marginal effect is calucated, if "max" then equal to the maximum value of the moderator in the dataset
# num_points: Total number of points for which a marginal effect is calculated - increase to make confidence bounds appear smoother
# conf: Size of confidence interval around coefficient estimates - 0-1, default is .95 (95% confidence)
# mean: Mark the mean mediator value by a vertical red line
# median: Mark the median mediator value by a vertical blue line
# alph: Transparency level of the histogram plot - 0-100, decrease to make the histogram more transparent
# rugplot: Include a rug plot of the mediator values below the figure
# histogram: Include a histogram of mediator values behind the figure - only plotted if minimum="min" and maximum="max"
# title: Title of the plot
# xlabel: Label of the X axis
# ylabel: Label of the Y axis
interaction_plot_continuous <- function(model, effect, moderator, interaction, varcov="default", minimum="min", maximum="max", incr="default", num_points = 50, conf=.95, mean=FALSE, median=FALSE, alph=80, rugplot=T, histogram=T, title="Marginal effects plot", xlabel="Value of moderator", ylabel="Estimated marginal coefficient"){
  
  # Define a function to make colors transparent
  makeTransparent<-function(someColor, alpha=alph){
    newColor<-col2rgb(someColor)
    apply(newColor, 2, function(curcoldata){rgb(red=curcoldata[1], green=curcoldata[2],
                                                blue=curcoldata[3],alpha=alpha, maxColorValue=255)})
  }
  
  # Extract Variance Covariance matrix
  if (varcov == "default"){
    covMat = vcov(model)
  }else{
    covMat = varcov
  }
  
  # Extract the data frame of the model
  mod_frame = model.frame(model)
  
  # Get coefficients of variables
  beta_1 = model$coefficients[[effect]]
  beta_3 = model$coefficients[[interaction]]
  
  # Set range of the moderator variable
  # Minimum
  if (minimum == "min"){
    min_val = min(mod_frame[[moderator]])
  }else{
    min_val = minimum
  }
  # Maximum
  if (maximum == "max"){
    max_val = max(mod_frame[[moderator]])
  }else{
    max_val = maximum
  }
  
  # Check if minimum smaller than maximum
  if (min_val > max_val){
    stop("Error: Minimum moderator value greater than maximum value.")
  }
  
  # Determine intervals between values of the moderator
  if (incr == "default"){
    increment = (max_val - min_val)/(num_points - 1)
  }else{
    increment = incr
  }
  
  # Create list of moderator values at which marginal effect is evaluated
  x_2 <- seq(from=min_val, to=max_val, by=increment)
  
  # Compute marginal effects
  delta_1 = beta_1 + beta_3*x_2
  
  # Compute variances
  var_1 = covMat[effect,effect] + (x_2^2)*covMat[interaction, interaction] + 2*x_2*covMat[effect, interaction]
  
  # Standard errors
  se_1 = sqrt(var_1)
  
  # Upper and lower confidence bounds
  z_score = qnorm(1 - ((1 - conf)/2))
  upper_bound = delta_1 + z_score*se_1
  lower_bound = delta_1 - z_score*se_1
  
  # Determine the bounds of the graphing area
  max_y = max(upper_bound)
  min_y = min(lower_bound)
  
  # Make the histogram color
  hist_col = makeTransparent("grey")
  
  # Initialize plotting window
  plot(x=c(), y=c(), ylim=c(min_y, max_y), xlim=c(min_val, max_val), xlab=xlabel, ylab=ylabel, main=title)
  
  # Plot estimated effects
  lines(y=delta_1, x=x_2)
  lines(y=upper_bound, x=x_2, lty=2)
  lines(y=lower_bound, x=x_2, lty=2)
  
  # Add a dashed horizontal line for zero
  abline(h=0, lty=3)
  
  # Add a vertical line at the mean
  if (mean){
    abline(v = mean(mod_frame[[moderator]]), lty=2, col="red")
  }
  
  # Add a vertical line at the median
  if (median){
    abline(v = median(mod_frame[[moderator]]), lty=3, col="blue")
  }
  
  # Add Rug plot
  if (rugplot){
    rug(mod_frame[[moderator]])
  }
  # Add Histogram (Histogram only plots when minimum and maximum are the min/max of the moderator)
  #  if (histogram & minimum=="min" & maximum=="max"){
  #    par(new=T)
  #    hist(mod_frame[[moderator]], axes=F, xlab="", ylab="",main="", border=hist_col, col=hist_col)
  #  }
}

## interaction_plot_binary: Plots the marginal effect for one variable interacted with a binary variable
## Usage
## Required
# model: linear or generalized linear model object returned by lm() or glm() function
# effect: name of the "effect" variable in the interaction (marginal effect plotted on y-axis) - character string
# moderator: name of the moderating variable in the interaction (plotted on x-axis) - character string - Variable must be binary (0-1)
# interaction: name of the interaction variable in the model object - character string
## Optional
# varcov: Variance-Covariance matrix - if default, then taken from the model object using vcov()
# conf: Size of confidence interval around coefficient estimates - 0-1, default is .95 (95% confidence)
# title: Title of the plot
# xlabel: Label of the X axis
# ylabel: Label of the Y axis
# factor_labels: Labels for each of the two moderator values - default = "0" and "1"
interaction_plot_binary <- function(model, effect, moderator, interaction, varcov="default", conf=.95, title="Marginal effects plot", xlabel="Value of moderator", ylabel="Estimated marginal coefficient", factor_labels=c(0,1)){
  
  # Extract Variance Covariance matrix
  if (varcov == "default"){
    covMat = vcov(model)
  }else{
    covMat = varcov
  }
  
  # Extract the data frame of the model
  mod_frame = model.frame(model)
  
  # Get coefficients of variables
  beta_1 = model$coefficients[[effect]]
  beta_3 = model$coefficients[[interaction]]
  
  # Create list of moderator values at which marginal effect is evaluated
  x_2 <- c(0,1)
  
  # Compute marginal effects
  delta_1 = beta_1 + beta_3*x_2
  
  # Compute variances
  var_1 = covMat[effect,effect] + (x_2^2)*covMat[interaction, interaction] + 2*x_2*covMat[effect, interaction]
  
  # Standard errors
  se_1 = sqrt(var_1)
  
  # Upper and lower confidence bounds
  z_score = qnorm(1 - ((1 - conf)/2))
  upper_bound = delta_1 + z_score*se_1
  lower_bound = delta_1 - z_score*se_1
  
  # Determine the bounds of the graphing area
  max_y = max(upper_bound)
  min_y = min(lower_bound)
  
  # Initialize plotting window
  plot(x=c(), y=c(), ylim=c(min_y, max_y), xlim=c(-.5, 1.5), xlab=xlabel, ylab=ylabel, main=title, xaxt="n")
  
  # Plot points of estimated effects
  points(x=x_2, y=delta_1, pch=16)
  
  # Plot lines of confidence intervals
  lines(x=c(x_2[1], x_2[1]), y=c(upper_bound[1], lower_bound[1]), lty=1)
  points(x=c(x_2[1], x_2[1]), y=c(upper_bound[1], lower_bound[1]), pch=c(25,24), bg="black")
  lines(x=c(x_2[2], x_2[2]), y=c(upper_bound[2], lower_bound[2]), lty=1)
  points(x=c(x_2[2], x_2[2]), y=c(upper_bound[2], lower_bound[2]), pch=c(25,24), bg="black")
  
  # Label the axis
  axis(side=1, at=c(0,1), labels=factor_labels)
  
  # Add a dashed horizontal line for zero
  abline(h=0, lty=3)
  
}


#########################END OF INTERACTION PLOT FUNCTION##################

##Generate results for Table 1. Building up to the base model.

plm3.1 <- plm (polity2_original ~ lag(oil_revenue_pc) + lag(lngdpcap) + 
                 lag(schoolenrollmentsecondarygrossse) + lag(tradeofgdpnetrdgnfszs) + 
                 lag(industryvalueaddedofgdpnvindtotl)  + time, 
               data=replication_data, model=c("pooling"), index = c("cnmr", "years"), effect="individual")
summary (plm3.1)
coeftest (plm3.1, vcov.= vcovHC, method = "white1", type="HC1", cluster="group")
cov3.1 <- vcovHC(plm3.1, method="white1", type="HC1", cluster="group" )
se3.1 <- sqrt(diag(cov3.1))


plm3.2 <- plm (polity2_original ~ lag(oil_revenue_pc) + lag(lngdpcap) + 
                 lag(schoolenrollmentsecondarygrossse) + lag(tradeofgdpnetrdgnfszs) + 
                 lag(industryvalueaddedofgdpnvindtotl) + lag(spatial_lag) + lag(spatial_lag_rs) + time, 
               data=replication_data, model=c("pooling"), index = c("cnmr", "years"), effect="individual")
summary (plm3.2)
coeftest (plm3.2, vcov.= vcovHC, method = "white1", type="HC3", cluster="group")
cov3.2 <- vcovHC(plm3.2, method="white1", type="HC3", cluster="group" )
se3.2 <- sqrt(diag(cov3.2))

plm3.3 <- plm (polity2_original ~ lag(oil_revenue_pc)*lag(wdiffx_sd_0) + lag(lngdpcap) + 
                 lag(schoolenrollmentsecondarygrossse) + lag(tradeofgdpnetrdgnfszs) + 
                 lag(industryvalueaddedofgdpnvindtotl) + lag(spatial_lag) + lag(spatial_lag_rs) + time, 
               data=replication_data, model=c("pooling"), index = c("cnmr", "years"), effect="individual")
summary (plm3.3)
coeftest (plm3.3, vcov.= vcovHC, method = "white1", type="HC3", cluster="group")
cov3.3 <- vcovHC(plm3.3, method="white1", type="HC3", cluster="group" )
se3.3 <- sqrt(diag(cov3.3))



##create table 1 output in latex

stargazer (plm3.1, plm3.2, plm3.3, type="latex", se=list(se3.1, se3.2, se3.3), out="table1.tex",
           column.labels=c("base","spatial","full"), align=TRUE, font.size="small",
           covariate.labels = c("Oil Revenue$_{t-1}$", "Difficulty$_{t-1}$", "Wealth$_{t-1}$", 
                                "Education$_{t-1}$", "Trade$_{t-1}$", "Industry$_{t-1}$",
                                "Aggregate Peer$_{t-1}$", "Average Peer$_{t-1}$", "Time",
                                "Revenue * Difficulty$_{t-1}$"), column.sep.width = "-10pt",
           dep.var.labels = "Democracy", title="Base, Spatial, and Interaction Models", omit.stat="f")


interaction_plot_continuous(plm3.3,effect="lag(oil_revenue_pc)", moderator="lag(wdiffx_sd_0)", interaction="lag(oil_revenue_pc):lag(wdiffx_sd_0)", 
                            varcov=vcovHC(plm3.3, method="white1", type="HC1", cluster="group"),
                            title="Figure 1. Marginal Effect of Oil Income on Regime (Model 3)",
                            xlabel = "Difficulty of Production", ylabel="Effect of Oil Income on Regime")


#replicate plm3.1 above for inclusion in Table 2.

plm3 <- plm (polity2_original ~ lag(oil_revenue_pc)*lag(wdiffx_sd_0) + lag(lngdpcap) + 
               lag(schoolenrollmentsecondarygrossse) + lag(tradeofgdpnetrdgnfszs) + 
               lag(industryvalueaddedofgdpnvindtotl) + lag(spatial_lag) + lag(spatial_lag_rs) + time, 
             data=replication_data, model=c("pooling"), index = c("cnmr", "years"), effect="individual")
summary (plm3)
coeftest (plm3, vcov.= vcovHC, type="HC1", method="white1", cluster="group")
cov3 <- vcovHC(plm3, method="white1", type="HC3", cluster="group" )
se3 <- sqrt(diag(cov3))



##same model as plm3, but with the difficulty components weighted to a [0,1] interval


plmx <- plm (polity2_original ~ lag(oil_revenue_pc)*lag(wdiffx_0) + lag(lngdpcap) + 
               lag(schoolenrollmentsecondarygrossse) + lag(tradeofgdpnetrdgnfszs) + 
               lag(industryvalueaddedofgdpnvindtotl) + lag(spatial_lag) + lag(spatial_lag_rs) + time, 
             data=replication_data, model=c("pooling"), index = c("cnmr", "years"), effect="individual")
summary (plmx)
coeftest (plmx, vcov.= vcovHC, type="HC1", method="white1", cluster="group")
covx <- vcovHC(plmx, method="white1", type="HC3", cluster="group" )
sex <- sqrt(diag(covx))



####full interaction model, no recoding of difficulty measure for non-oil producing country-years (i.e.,
# listwise deletion of those cases.)

plm3not0 <- plm (polity2_original ~ lag(oil_revenue_pc)*lag(wdiffx_sd) + lag(lngdpcap) + 
                   lag(schoolenrollmentsecondarygrossse) + lag(tradeofgdpnetrdgnfszs) + 
                   lag(industryvalueaddedofgdpnvindtotl) + lag(spatial_lag) + lag(spatial_lag_rs) + time, 
                 data=replication_data, model=c("pooling"), index = c("cnmr", "years"), effect="individual")
summary (plm3not0)
coeftest (plm3not0, vcov.= vcovHC, type="HC1", method="white1", cluster="group")
cov3not0 <- vcovHC(plm3not0, method="white1", type="HC3", cluster="group" )
se3not0 <- sqrt(diag(cov3not0))





##Table 2. Various robustness checks: alternative specifications of difficulty.

stargazer (plm3, plm3not0, plmx, type="latex", se=list(se3, se3not0, sex), out="table2.tex",
           column.labels=c("Full","Oil Only","Alt. Difficulty"), align=TRUE, font.size="footnotesize",
           covariate.labels = c("Oil Revenue$_{t-1}$", "Difficulty1$_{t-1}$", "Difficulty2$_{t-1}$", 
                                "Difficulty3$_{t-1}$", "Wealth$_{t-1}$", 
                                "Education$_{t-1}$", "Trade$_{t-1}$", "Industry$_{t-1}$",
                                "Aggregate Peer$_{t-1}$", "Average Peer$_{t-1}$", "Time",
                                "Revenue * Difficulty1$_{t-1}$", "Revenue * Difficulty2$_{t-1}$",
                                "Revenue * Difficulty3$_{t-1}$"), column.sep.width = "-10pt", 
           dep.var.labels = "Regime", title="Robustness: Alternative Approaches to Difficulty",
           omit.stat="f")

#plots of interactions for table 2.

interaction_plot_continuous(plm3not0,effect="lag(oil_revenue_pc)", moderator="lag(wdiffx_sd)", interaction="lag(oil_revenue_pc):lag(wdiffx_sd)", 
                            varcov=vcovHC(plm3not0, method="white1", type="HC1", cluster="group"),
                            title="Figure 2. Marginal Effect of Oil Revenue on Regime (Oil Countries)",
                            xlabel = "Difficulty of Production", ylabel="Effect of Oil Income on Democracy")

interaction_plot_continuous(plmx,effect="lag(oil_revenue_pc)", moderator="lag(wdiffx_0)", interaction="lag(oil_revenue_pc):lag(wdiffx_0)", 
                            varcov=vcovHC(plmx, method="white1", type="HC1", cluster="group"),
                            title="Figure 3. Marginal Effect of Oil Revenue on Regime",
                            xlabel = "Difficulty of Production [0,1]", ylabel="Effect of Oil Income on Democracy")

##same model as plm3, but testing oil production, not oil revenues
plm3b <- plm (polity2_original ~ lag(oil_pc)*lag(wdiffx_sd_0) + lag(lngdpcap) + 
                lag(schoolenrollmentsecondarygrossse) + lag(tradeofgdpnetrdgnfszs) + 
                lag(industryvalueaddedofgdpnvindtotl) + lag(spatial_lag) + lag(spatial_lag_rs) + time, 
              data=replication_data, model=c("pooling"), index = c("cnmr", "years"), effect="individual")
summary (plm3b)
coeftest (plm3b, vcov.= vcovHC, type="HC1", method="white1", cluster="group")
cov3b <- vcovHC(plm3b, method="white1", type="HC3", cluster="group" )
se3b <- sqrt(diag(cov3b))




## lagged dependent variable model

plm3.ldv <- plm (polity2_original ~ lag(polity2_original) + lag(oil_revenue_pc)*lag(wdiffx_sd_0) + lag(lngdpcap) + 
                   lag(schoolenrollmentsecondarygrossse) + lag(tradeofgdpnetrdgnfszs) + 
                   lag(industryvalueaddedofgdpnvindtotl) + lag(spatial_lag) + lag(spatial_lag_rs) + time, 
                 data=ldvdata, model=c("pooling"), index = c("cnmr", "years"), effect="individual")
summary (plm3.ldv)
coeftest (plm3.ldv, vcov.= vcovHC, type="HC1", save="TRUE", method="white1", cluster="group")
cov3.ldv <- vcovHC(plm3.ldv, method="white1", type="HC3", cluster="group" )
se3.ldv <- sqrt(diag(cov3.ldv))

#first-differenced dependent variable 
plm3.fd <- plm (firstdiff ~  + lag(oil_revenue_pc)*lag(wdiffx_sd_0) + lag(lngdpcap) + 
                  lag(schoolenrollmentsecondarygrossse) + lag(tradeofgdpnetrdgnfszs) + 
                  lag(industryvalueaddedofgdpnvindtotl) + lag(spatial_lag) + lag(spatial_lag_rs) , 
                data=ldvdata, model=c("pooling"), index = c("cnmr", "years"), effect="individual")
summary (plm3.fd)
coeftest (plm3.fd, vcov.= vcovHC, type="HC1", save="TRUE", method="white1", cluster="group")
cov3.fd <- vcovHC(plm3.fd, method="white1", type="HC3", cluster="group" )
se3.fd <- sqrt(diag(cov3.fd))


## Table 3 - production, lagged dependent variable, first differences

stargazer (plm3b, plm3.ldv, plm3.fd, type="latex", se=list(se3b, se3.ldv, se3.fd), out="table3.tex",
           column.labels=c("Oil Production","Oil Revenue","Oil Revenue"), align=TRUE, font.size="footnotesize",
           covariate.labels = c("Oil Production$_{t-1}$", "Democracy$_{t-1}$", "Oil Revenue$_{t-1}$",
                                "Difficulty$_{t-1}$", "Wealth$_{t-1}$", 
                                "Education$_{t-1}$", "Trade$_{t-1}$", "Industry$_{t-1}$",
                                "Aggregate Peer$_{t-1}$", "Average Peer$_{t-1}$", "Time",
                                "Production * Difficulty$_{t-1}$", "Revenue * Difficulty$_{t-1}$"),
           dep.var.labels = c("Democracy","$\\Delta$ Democracy"), omit.stat="f",
           title="Resource Booms, Sticky Institutions, or Changes to Regime")




#run the script to create the interaction_plot function, interaction_plots.R
#then create the plot

interaction_plot_continuous(plm3b,effect="lag(oil_pc)", moderator="lag(wdiffx_sd_0)", interaction="lag(oil_pc):lag(wdiffx_sd_0)", 
                            varcov=vcovHC(plm3b, method="white1", type="HC1", cluster="group"),
                            title="Figure 4. Marginal Effect of Oil Production on Regime",
                            xlabel = "Difficulty of Production", ylabel="Effect of Oil Production on Democracy")


interaction_plot_continuous(plm3.ldv,effect="lag(oil_revenue_pc)", moderator="lag(wdiffx_sd_0)", interaction="lag(oil_revenue_pc):lag(wdiffx_sd_0)", 
                            varcov=vcovHC(plm3.ldv, method="white1", type="HC1", cluster="group"),
                            title="Figure 5. Marginal Effect of Oil Income on Regime - Lagged DV",
                            xlabel = "Difficulty of Production", ylabel="Effect of Oil Income on Democracy")


interaction_plot_continuous(plm3.fd,effect="lag(oil_revenue_pc)", moderator="lag(wdiffx_sd_0)", interaction="lag(oil_revenue_pc):lag(wdiffx_sd_0)", 
                            varcov=vcovHC(plm3.fd, method="white1", type="HC1", cluster="group"),
                            title="Figure 6. Marginal Effect of Oil Income on Changes to Regime",
                            xlabel = "Difficulty of Production", ylabel="Effect of Oil Income on Regime Changes")





